Imputation
We generated a plot to see all the missing values in the sample.
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
library(dplyr)
library(ggplot2)
vector_variables<-
c("tipo_de_programa_2", "estado_conyugal_2", "edad_al_ing_grupos", "escolaridad_rec", "sus_principal_mod", "freq_cons_sus_prin", "compromiso_biopsicosocial", "tenencia_de_la_vivienda_mod", "num_otras_sus_mod", "numero_de_hijos_mod_rec", "motivodeegreso_mod_imp")
missing.values<-
CONS_C1_df_dup_SEP_2020_women %>%
rowwise %>%
dplyr::mutate_at(.vars = vars(vector_variables),
.funs = ~ifelse(is.na(.), 1, 0)) %>%
dplyr::ungroup() %>%
dplyr::summarise_at(vars(vector_variables),~sum(.))
#t(missing.values)
miss_val_bar<-
melt(missing.values) %>%
mutate(perc=scales::percent(value/nrow(CONS_C1_df_dup_SEP_2020_women))) %>%
arrange(desc(perc))
plot_miss<-
missing.values %>%
data.table::melt() %>% #condicion_ocupacional_corr
dplyr::filter(!variable %in% c("row", "hash_key", "dias_treat_imp_sin_na", "dup")) %>%
dplyr::mutate(perc= value/sum(nrow(CONS_C1_df_dup_SEP_2020_women))) %>%
dplyr::mutate(label_text= paste0("Variable= ",variable,"<br>n= ",value,"<br>",scales::percent(round(perc,3)))) %>%
dplyr::mutate(perc=perc*100) %>%
ggplot() +
geom_bar(aes(x=factor(variable), y=perc,label= label_text), stat = 'identity') +
sjPlot::theme_sjplot()+
# scale_y_continuous(limits=c(0,1), labels=percent)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
labs(x=NULL, y="% of Missing Values", caption=paste0("Nota. Percentage of missing values (n= ",sum(nrow(CONS_C1_df_dup_SEP_2020_women)),")"))
ggplotly(plot_miss, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T), height = 600, width=800) %>% layout(yaxis = list(tickformat='%', range = c(0, 5)))
#</div>
From the figure above, we could see that the Tenure status of households (tenencia_de_la_vivienda_mod) and the Biopsychosocial Involvement (ompromiso_biopsicosocial) had a proportion of missing values, but no greater than 5%. This is why we imputed these values under MAR assumption.
vector_variables_only_for_imputation<-
c("row", "hash_key", "tipo_de_programa_2", "estado_conyugal_2", "edad_al_ing_grupos", "escolaridad_rec", "sus_principal_mod", "freq_cons_sus_prin", "compromiso_biopsicosocial", "tenencia_de_la_vivienda_mod", "num_otras_sus_mod", "numero_de_hijos_mod_rec","motivodeegreso_mod_imp")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#HACER BASE ESPECIAL QUE CONTENGA UNA VARIABLE DE EDAD DE INICIO DE CONSUMO DE SUSTANCIA PRINCIPAL PARA EQUIPARAR
CONS_C1_df_dup_SEP_2020_women_miss<-
CONS_C1_df_dup_SEP_2020_women %>%
#dplyr::group_by(hash_key) %>%
#dplyr::mutate(rn=row_number()) %>%
#dplyr::ungroup() %>%
#:#:#:#:#:#:#:#:#:#:#:
# ORDINALIZAR LAS VARIABLES ORDINALES:
dplyr::select_(.dots = vector_variables_only_for_imputation) %>%
data.table::data.table()
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(evaluacindelprocesoteraputico)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
library(Amelia)
amelia_fit <- amelia(CONS_C1_df_dup_SEP_2020_women_miss,
#Warning message:
#In amcheck(x = x, m = m, idvars = numopts$idvars, priors = priors, :
#The number of categories in one of the variables marked nominal has greater than 10 categories. Check nominal specification.
m=61,
parallel = "multicore",
idvars="row",
noms= c("tipo_de_programa_2", "estado_conyugal_2", "sus_principal_mod", "tenencia_de_la_vivienda_mod", "numero_de_hijos_mod_rec","motivodeegreso_mod_imp"),
ords= c("edad_al_ing_grupos", "escolaridad_rec", "freq_cons_sus_prin", "compromiso_biopsicosocial", "num_otras_sus_mod"),
cs = "hash_key",
incheck = TRUE)
# Se sacó el servicio de salud porque tiene mucha información: The number of categories in one of the variables marked nominal has greater than 10 categories. Check nominal specification.
#Error in yy %*% unique(na.omit(x.orig[, i])) : non-conformable arguments.
Age at Admission to Treatment (in groups)
We started looking over the missing values in the age at admission (in groups) (n=4).
#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){
res <- {
setTimeLimit(nn_K*500)
ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_al_ing_grupos")
}
}
paste0("Users that had more than one treatment with no date of admission: ",CONS_C1_df_dup_SEP_2020_women_miss %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(na_edad_ing=sum(is.na(edad_al_ing_grupos))) %>%
dplyr::ungroup() %>%
dplyr::filter(na_edad_ing>0) %>%
dplyr::group_by(hash_key) %>%
dplyr::summarise(n=n()) %>% dplyr::filter(n>1) %>% nrow())
## [1] "Users that had more than one treatment with no date of admission: 0"
#Hay poca relación en las imputaciones.
#table(is.na(CONS_C1_df_dup_SEP_2020_women_not_miss$edad_al_ing),exclude=NULL)
edad_al_ing_grupos_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$edad_al_ing_grupos,
amelia_fit$imputations$imp2$edad_al_ing_grupos,
amelia_fit$imputations$imp3$edad_al_ing_grupos,
amelia_fit$imputations$imp4$edad_al_ing_grupos,
amelia_fit$imputations$imp5$edad_al_ing_grupos,
amelia_fit$imputations$imp6$edad_al_ing_grupos,
amelia_fit$imputations$imp7$edad_al_ing_grupos,
amelia_fit$imputations$imp8$edad_al_ing_grupos,
amelia_fit$imputations$imp9$edad_al_ing_grupos,
amelia_fit$imputations$imp10$edad_al_ing_grupos,
amelia_fit$imputations$imp11$edad_al_ing_grupos,
amelia_fit$imputations$imp12$edad_al_ing_grupos,
amelia_fit$imputations$imp13$edad_al_ing_grupos,
amelia_fit$imputations$imp14$edad_al_ing_grupos,
amelia_fit$imputations$imp15$edad_al_ing_grupos,
amelia_fit$imputations$imp16$edad_al_ing_grupos,
amelia_fit$imputations$imp17$edad_al_ing_grupos,
amelia_fit$imputations$imp18$edad_al_ing_grupos,
amelia_fit$imputations$imp19$edad_al_ing_grupos,
amelia_fit$imputations$imp20$edad_al_ing_grupos,
amelia_fit$imputations$imp21$edad_al_ing_grupos,
amelia_fit$imputations$imp22$edad_al_ing_grupos,
amelia_fit$imputations$imp23$edad_al_ing_grupos,
amelia_fit$imputations$imp24$edad_al_ing_grupos,
amelia_fit$imputations$imp25$edad_al_ing_grupos,
amelia_fit$imputations$imp26$edad_al_ing_grupos,
amelia_fit$imputations$imp27$edad_al_ing_grupos,
amelia_fit$imputations$imp28$edad_al_ing_grupos,
amelia_fit$imputations$imp29$edad_al_ing_grupos,
amelia_fit$imputations$imp30$edad_al_ing_grupos,
amelia_fit$imputations$imp31$edad_al_ing_grupos,
amelia_fit$imputations$imp32$edad_al_ing_grupos,
amelia_fit$imputations$imp33$edad_al_ing_grupos,
amelia_fit$imputations$imp34$edad_al_ing_grupos,
amelia_fit$imputations$imp35$edad_al_ing_grupos,
amelia_fit$imputations$imp36$edad_al_ing_grupos,
amelia_fit$imputations$imp37$edad_al_ing_grupos,
amelia_fit$imputations$imp38$edad_al_ing_grupos,
amelia_fit$imputations$imp39$edad_al_ing_grupos,
amelia_fit$imputations$imp40$edad_al_ing_grupos,
amelia_fit$imputations$imp41$edad_al_ing_grupos,
amelia_fit$imputations$imp42$edad_al_ing_grupos,
amelia_fit$imputations$imp43$edad_al_ing_grupos,
amelia_fit$imputations$imp44$edad_al_ing_grupos,
amelia_fit$imputations$imp45$edad_al_ing_grupos,
amelia_fit$imputations$imp46$edad_al_ing_grupos,
amelia_fit$imputations$imp47$edad_al_ing_grupos,
amelia_fit$imputations$imp48$edad_al_ing_grupos,
amelia_fit$imputations$imp49$edad_al_ing_grupos,
amelia_fit$imputations$imp50$edad_al_ing_grupos,
amelia_fit$imputations$imp51$edad_al_ing_grupos,
amelia_fit$imputations$imp52$edad_al_ing_grupos,
amelia_fit$imputations$imp53$edad_al_ing_grupos,
amelia_fit$imputations$imp54$edad_al_ing_grupos,
amelia_fit$imputations$imp55$edad_al_ing_grupos,
amelia_fit$imputations$imp56$edad_al_ing_grupos,
amelia_fit$imputations$imp57$edad_al_ing_grupos,
amelia_fit$imputations$imp58$edad_al_ing_grupos,
amelia_fit$imputations$imp59$edad_al_ing_grupos,
amelia_fit$imputations$imp60$edad_al_ing_grupos,
amelia_fit$imputations$imp61$edad_al_ing_grupos
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
#18-29 30-39 40-49 50+
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(edad_18_29=sum(value == "<18-29",na.rm=T),
edad_30_39=sum(value == "30-39",na.rm=T),
edad_40_49=sum(value == "40-49",na.rm=T),
edad_50mas=sum(value =="50+",na.rm=T)) %>%
dplyr::ungroup() %>%
#dplyr::mutate(edad_suma = base::rowSums(dplyr::select(is.na(.),starts_with("edad"))))
dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("edad"))>0)) %>%
dplyr::mutate(edad_al_ing_grupos_imp= dplyr::case_when(
(edad_18_29> edad_30_39) & (edad_18_29> edad_40_49) & (edad_18_29> edad_50mas)~"<18-29",
(edad_30_39> edad_18_29) & (edad_30_39> edad_40_49) & (edad_30_39> edad_50mas)~"30-39",
(edad_40_49> edad_18_29) & (edad_40_49> edad_30_39) & (edad_40_49> edad_50mas)~"40-49",
(edad_50mas> edad_18_29) & (edad_50mas> edad_30_39) & (edad_50mas> edad_40_49)~"50+"
))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_women_miss0<-
CONS_C1_df_dup_SEP_2020_women_miss %>%
dplyr::left_join(edad_al_ing_grupos_imputed,by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(edad_al_ing_grupos=dplyr::case_when(is.na(edad_al_ing_grupos)~edad_al_ing_grupos_imp,
T~as.character(edad_al_ing_grupos))) %>%
dplyr::select(-edad_18_29, -edad_30_39, -edad_40_49, -edad_50mas, -ties, -edad_al_ing_grupos_imp)
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss0)-nrow(CONS_C1_df_dup_SEP_2020_women_miss)>0){
warning("AGS: Some rows were added in the imputation")}
After the imputation, there were no missing cases left.
Primary or main substance
Then we imputed the primary/main substance at admission (n= 1).
# Ver distintos valores propuestos para sustancia de inciio
sus_principal_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$sus_principal_mod,
amelia_fit$imputations$imp2$sus_principal_mod,
amelia_fit$imputations$imp3$sus_principal_mod,
amelia_fit$imputations$imp4$sus_principal_mod,
amelia_fit$imputations$imp5$sus_principal_mod,
amelia_fit$imputations$imp6$sus_principal_mod,
amelia_fit$imputations$imp7$sus_principal_mod,
amelia_fit$imputations$imp8$sus_principal_mod,
amelia_fit$imputations$imp9$sus_principal_mod,
amelia_fit$imputations$imp10$sus_principal_mod,
amelia_fit$imputations$imp11$sus_principal_mod,
amelia_fit$imputations$imp12$sus_principal_mod,
amelia_fit$imputations$imp13$sus_principal_mod,
amelia_fit$imputations$imp14$sus_principal_mod,
amelia_fit$imputations$imp15$sus_principal_mod,
amelia_fit$imputations$imp16$sus_principal_mod,
amelia_fit$imputations$imp17$sus_principal_mod,
amelia_fit$imputations$imp18$sus_principal_mod,
amelia_fit$imputations$imp19$sus_principal_mod,
amelia_fit$imputations$imp20$sus_principal_mod,
amelia_fit$imputations$imp21$sus_principal_mod,
amelia_fit$imputations$imp22$sus_principal_mod,
amelia_fit$imputations$imp23$sus_principal_mod,
amelia_fit$imputations$imp24$sus_principal_mod,
amelia_fit$imputations$imp25$sus_principal_mod,
amelia_fit$imputations$imp26$sus_principal_mod,
amelia_fit$imputations$imp27$sus_principal_mod,
amelia_fit$imputations$imp28$sus_principal_mod,
amelia_fit$imputations$imp29$sus_principal_mod,
amelia_fit$imputations$imp30$sus_principal_mod,
amelia_fit$imputations$imp31$sus_principal_mod,
amelia_fit$imputations$imp32$sus_principal_mod,
amelia_fit$imputations$imp33$sus_principal_mod,
amelia_fit$imputations$imp34$sus_principal_mod,
amelia_fit$imputations$imp35$sus_principal_mod,
amelia_fit$imputations$imp36$sus_principal_mod,
amelia_fit$imputations$imp37$sus_principal_mod,
amelia_fit$imputations$imp38$sus_principal_mod,
amelia_fit$imputations$imp39$sus_principal_mod,
amelia_fit$imputations$imp40$sus_principal_mod,
amelia_fit$imputations$imp41$sus_principal_mod,
amelia_fit$imputations$imp42$sus_principal_mod,
amelia_fit$imputations$imp43$sus_principal_mod,
amelia_fit$imputations$imp44$sus_principal_mod,
amelia_fit$imputations$imp45$sus_principal_mod,
amelia_fit$imputations$imp46$sus_principal_mod,
amelia_fit$imputations$imp47$sus_principal_mod,
amelia_fit$imputations$imp48$sus_principal_mod,
amelia_fit$imputations$imp49$sus_principal_mod,
amelia_fit$imputations$imp50$sus_principal_mod,
amelia_fit$imputations$imp51$sus_principal_mod,
amelia_fit$imputations$imp52$sus_principal_mod,
amelia_fit$imputations$imp53$sus_principal_mod,
amelia_fit$imputations$imp54$sus_principal_mod,
amelia_fit$imputations$imp55$sus_principal_mod,
amelia_fit$imputations$imp56$sus_principal_mod,
amelia_fit$imputations$imp57$sus_principal_mod,
amelia_fit$imputations$imp58$sus_principal_mod,
amelia_fit$imputations$imp59$sus_principal_mod,
amelia_fit$imputations$imp60$sus_principal_mod,
amelia_fit$imputations$imp61$sus_principal_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
#18-29 30-39 40-49 50+
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(sus_prin_mar=sum(value == "Marijuana",na.rm=T),
sus_prin_oh=sum(value == "Alcohol",na.rm=T),
sus_prin_pb=sum(value == "Cocaine paste",na.rm=T),
sus_prin_coc=sum(value =="Cocaine hydrochloride",na.rm=T),
sus_prin_other=sum(value =="Other",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("sus_prin_"))>0)) %>%
dplyr::mutate(sus_principal_mod_imp= dplyr::case_when(
(sus_prin_mar> sus_prin_oh)& (sus_prin_mar> sus_prin_pb)& (sus_prin_mar> sus_prin_coc)& (sus_prin_mar> sus_prin_other)~"Marijuana",
(sus_prin_oh> sus_prin_mar)& (sus_prin_oh> sus_prin_pb)& (sus_prin_oh> sus_prin_coc)& (sus_prin_oh> sus_prin_other)~"Alcohol",
(sus_prin_pb> sus_prin_mar)& (sus_prin_pb> sus_prin_oh)& (sus_prin_pb> sus_prin_coc)& (sus_prin_pb> sus_prin_other)~"Cocaine paste",
(sus_prin_coc> sus_prin_mar)& (sus_prin_coc> sus_prin_oh)& (sus_prin_coc> sus_prin_pb)& (sus_prin_coc> sus_prin_other)~"Cocaine hydrochloride",
(sus_prin_other> sus_prin_mar)& (sus_prin_other> sus_prin_oh)& (sus_prin_other> sus_prin_pb)& (sus_prin_other> sus_prin_coc)~"Cocaine hydrochloride"
))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss1<-
CONS_C1_df_dup_SEP_2020_women_miss0 %>%
dplyr::left_join(sus_principal_mod_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(sus_principal_mod=factor(dplyr::case_when(is.na(sus_principal_mod)~as.character(sus_principal_mod_imp),
TRUE~as.character(sus_principal_mod)))) %>%
dplyr::select(-c(sus_prin_mar, sus_prin_oh, sus_prin_pb, sus_prin_coc, sus_prin_other, ties, sus_principal_mod_imp)) %>%
data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss1)-nrow(CONS_C1_df_dup_SEP_2020_women_miss0)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Co-occurring SUD
Another variable worth imputing is the presence of additional Substance Use Disorders (n= 1).
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
num_otras_sus_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$num_otras_sus_mod,
amelia_fit$imputations$imp2$num_otras_sus_mod,
amelia_fit$imputations$imp3$num_otras_sus_mod,
amelia_fit$imputations$imp4$num_otras_sus_mod,
amelia_fit$imputations$imp5$num_otras_sus_mod,
amelia_fit$imputations$imp6$num_otras_sus_mod,
amelia_fit$imputations$imp7$num_otras_sus_mod,
amelia_fit$imputations$imp8$num_otras_sus_mod,
amelia_fit$imputations$imp9$num_otras_sus_mod,
amelia_fit$imputations$imp10$num_otras_sus_mod,
amelia_fit$imputations$imp11$num_otras_sus_mod,
amelia_fit$imputations$imp12$num_otras_sus_mod,
amelia_fit$imputations$imp13$num_otras_sus_mod,
amelia_fit$imputations$imp14$num_otras_sus_mod,
amelia_fit$imputations$imp15$num_otras_sus_mod,
amelia_fit$imputations$imp16$num_otras_sus_mod,
amelia_fit$imputations$imp17$num_otras_sus_mod,
amelia_fit$imputations$imp18$num_otras_sus_mod,
amelia_fit$imputations$imp19$num_otras_sus_mod,
amelia_fit$imputations$imp20$num_otras_sus_mod,
amelia_fit$imputations$imp21$num_otras_sus_mod,
amelia_fit$imputations$imp22$num_otras_sus_mod,
amelia_fit$imputations$imp23$num_otras_sus_mod,
amelia_fit$imputations$imp24$num_otras_sus_mod,
amelia_fit$imputations$imp25$num_otras_sus_mod,
amelia_fit$imputations$imp26$num_otras_sus_mod,
amelia_fit$imputations$imp27$num_otras_sus_mod,
amelia_fit$imputations$imp28$num_otras_sus_mod,
amelia_fit$imputations$imp29$num_otras_sus_mod,
amelia_fit$imputations$imp30$num_otras_sus_mod,
amelia_fit$imputations$imp31$num_otras_sus_mod,
amelia_fit$imputations$imp32$num_otras_sus_mod,
amelia_fit$imputations$imp33$num_otras_sus_mod,
amelia_fit$imputations$imp34$num_otras_sus_mod,
amelia_fit$imputations$imp35$num_otras_sus_mod,
amelia_fit$imputations$imp36$num_otras_sus_mod,
amelia_fit$imputations$imp37$num_otras_sus_mod,
amelia_fit$imputations$imp38$num_otras_sus_mod,
amelia_fit$imputations$imp39$num_otras_sus_mod,
amelia_fit$imputations$imp40$num_otras_sus_mod,
amelia_fit$imputations$imp41$num_otras_sus_mod,
amelia_fit$imputations$imp42$num_otras_sus_mod,
amelia_fit$imputations$imp43$num_otras_sus_mod,
amelia_fit$imputations$imp44$num_otras_sus_mod,
amelia_fit$imputations$imp45$num_otras_sus_mod,
amelia_fit$imputations$imp46$num_otras_sus_mod,
amelia_fit$imputations$imp47$num_otras_sus_mod,
amelia_fit$imputations$imp48$num_otras_sus_mod,
amelia_fit$imputations$imp49$num_otras_sus_mod,
amelia_fit$imputations$imp50$num_otras_sus_mod,
amelia_fit$imputations$imp51$num_otras_sus_mod,
amelia_fit$imputations$imp52$num_otras_sus_mod,
amelia_fit$imputations$imp53$num_otras_sus_mod,
amelia_fit$imputations$imp54$num_otras_sus_mod,
amelia_fit$imputations$imp55$num_otras_sus_mod,
amelia_fit$imputations$imp56$num_otras_sus_mod,
amelia_fit$imputations$imp57$num_otras_sus_mod,
amelia_fit$imputations$imp58$num_otras_sus_mod,
amelia_fit$imputations$imp59$num_otras_sus_mod,
amelia_fit$imputations$imp60$num_otras_sus_mod,
amelia_fit$imputations$imp61$num_otras_sus_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(no_ad_subs=sum(value == "No additional substance",na.rm=T),
one_ad_subs=sum(value == "One additional substance",na.rm=T),
more_one_ad_subs=sum(value == "More than one additional substance",na.rm=T)) %>%
dplyr::ungroup() %>%
# Hacer la variable imputada
dplyr::mutate(num_otras_sus_mod_imp= dplyr::case_when(
(no_ad_subs>one_ad_subs)&(no_ad_subs>more_one_ad_subs)~"No additional substance",
(one_ad_subs>no_ad_subs)&(one_ad_subs>more_one_ad_subs)~"One additional substance",
(more_one_ad_subs>no_ad_subs)&(more_one_ad_subs>one_ad_subs)~"More than one additional substance",
T~NA_character_))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss2<-
CONS_C1_df_dup_SEP_2020_women_miss1 %>%
dplyr::left_join(num_otras_sus_mod_imputed[,c("amelia_fit_imputations_imp1_row","num_otras_sus_mod_imp")],
by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(num_otras_sus_mod=
dplyr::case_when(is.na(num_otras_sus_mod)~num_otras_sus_mod_imp,
T~as.character(num_otras_sus_mod))) %>%
dplyr::select(-num_otras_sus_mod_imp)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#is.na(edad_ini_cons) & is.na(edad_ini_sus_prin) & is.na(min_edad_al_ing)~as.numeric(avg),
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss1$edad_ini_cons))
paste0("Number of rows with values that did not fulfilled the conditions: ",CONS_C1_df_dup_SEP_2020_women_miss2 %>% dplyr::filter(is.na(num_otras_sus_mod)) %>%
dplyr::select(hash_key, edad_al_ing_grupos,num_otras_sus_mod) %>% nrow())
## [1] "Number of rows with values that did not fulfilled the conditions: 0"
#Lo importante es tener en cuenta que las imputaciones se hicieron por filas; no, en cambio, ahora debemos reemplazar aquellos casos que tienen perdidos (no cumplieron con las condiciones) con el valor mínimo
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss2)-nrow(CONS_C1_df_dup_SEP_2020_women_miss1)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were 0 values of co-occurring SUDs available.
Frequency of Use of the Primary Substance at Admission
Another variable that is worth imputing is the Frequency of use of primary drug at admission (n= 116). In case of ties, we selected the imputed values with the value with the most frequent drug use.
# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$freq_cons_sus_prin,
amelia_fit$imputations$imp2$freq_cons_sus_prin,
amelia_fit$imputations$imp3$freq_cons_sus_prin,
amelia_fit$imputations$imp4$freq_cons_sus_prin,
amelia_fit$imputations$imp5$freq_cons_sus_prin,
amelia_fit$imputations$imp6$freq_cons_sus_prin,
amelia_fit$imputations$imp7$freq_cons_sus_prin,
amelia_fit$imputations$imp8$freq_cons_sus_prin,
amelia_fit$imputations$imp9$freq_cons_sus_prin,
amelia_fit$imputations$imp10$freq_cons_sus_prin,
amelia_fit$imputations$imp11$freq_cons_sus_prin,
amelia_fit$imputations$imp12$freq_cons_sus_prin,
amelia_fit$imputations$imp13$freq_cons_sus_prin,
amelia_fit$imputations$imp14$freq_cons_sus_prin,
amelia_fit$imputations$imp15$freq_cons_sus_prin,
amelia_fit$imputations$imp16$freq_cons_sus_prin,
amelia_fit$imputations$imp17$freq_cons_sus_prin,
amelia_fit$imputations$imp18$freq_cons_sus_prin,
amelia_fit$imputations$imp19$freq_cons_sus_prin,
amelia_fit$imputations$imp20$freq_cons_sus_prin,
amelia_fit$imputations$imp21$freq_cons_sus_prin,
amelia_fit$imputations$imp22$freq_cons_sus_prin,
amelia_fit$imputations$imp23$freq_cons_sus_prin,
amelia_fit$imputations$imp24$freq_cons_sus_prin,
amelia_fit$imputations$imp25$freq_cons_sus_prin,
amelia_fit$imputations$imp26$freq_cons_sus_prin,
amelia_fit$imputations$imp27$freq_cons_sus_prin,
amelia_fit$imputations$imp28$freq_cons_sus_prin,
amelia_fit$imputations$imp29$freq_cons_sus_prin,
amelia_fit$imputations$imp30$freq_cons_sus_prin,
amelia_fit$imputations$imp31$freq_cons_sus_prin,
amelia_fit$imputations$imp32$freq_cons_sus_prin,
amelia_fit$imputations$imp33$freq_cons_sus_prin,
amelia_fit$imputations$imp34$freq_cons_sus_prin,
amelia_fit$imputations$imp35$freq_cons_sus_prin,
amelia_fit$imputations$imp36$freq_cons_sus_prin,
amelia_fit$imputations$imp37$freq_cons_sus_prin,
amelia_fit$imputations$imp38$freq_cons_sus_prin,
amelia_fit$imputations$imp39$freq_cons_sus_prin,
amelia_fit$imputations$imp40$freq_cons_sus_prin,
amelia_fit$imputations$imp41$freq_cons_sus_prin,
amelia_fit$imputations$imp42$freq_cons_sus_prin,
amelia_fit$imputations$imp43$freq_cons_sus_prin,
amelia_fit$imputations$imp44$freq_cons_sus_prin,
amelia_fit$imputations$imp45$freq_cons_sus_prin,
amelia_fit$imputations$imp46$freq_cons_sus_prin,
amelia_fit$imputations$imp47$freq_cons_sus_prin,
amelia_fit$imputations$imp48$freq_cons_sus_prin,
amelia_fit$imputations$imp49$freq_cons_sus_prin,
amelia_fit$imputations$imp50$freq_cons_sus_prin,
amelia_fit$imputations$imp51$freq_cons_sus_prin,
amelia_fit$imputations$imp52$freq_cons_sus_prin,
amelia_fit$imputations$imp53$freq_cons_sus_prin,
amelia_fit$imputations$imp54$freq_cons_sus_prin,
amelia_fit$imputations$imp55$freq_cons_sus_prin,
amelia_fit$imputations$imp56$freq_cons_sus_prin,
amelia_fit$imputations$imp57$freq_cons_sus_prin,
amelia_fit$imputations$imp58$freq_cons_sus_prin,
amelia_fit$imputations$imp59$freq_cons_sus_prin,
amelia_fit$imputations$imp60$freq_cons_sus_prin,
amelia_fit$imputations$imp61$freq_cons_sus_prin
)
freq_cons_sus_prin_imputed<-
freq_cons_sus_prin_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>%
dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
#hierarchy
dplyr::mutate(freq_cons_sus_prin_to_imputation=
dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use")) %>%
janitor::clean_names()
freq_cons_sus_prin_imputed<-
dplyr::select(freq_cons_sus_prin_imputed,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss3<-
CONS_C1_df_dup_SEP_2020_women_miss2 %>%
dplyr::left_join(freq_cons_sus_prin_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>%
dplyr::select(-freq_cons_sus_prin_to_imputation) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss3)-nrow(CONS_C1_df_dup_SEP_2020_women_miss2)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Educational Attainment
Another variable that is worth imputing is the Educational Attainment (n= 115). In case of ties,w imputed for the most vulnerable category among the candidates.
# Ver distintos valores propuestos para sustancia de inciio
escolaridad_rec_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$escolaridad_rec,
amelia_fit$imputations$imp2$escolaridad_rec,
amelia_fit$imputations$imp3$escolaridad_rec,
amelia_fit$imputations$imp4$escolaridad_rec,
amelia_fit$imputations$imp5$escolaridad_rec,
amelia_fit$imputations$imp6$escolaridad_rec,
amelia_fit$imputations$imp7$escolaridad_rec,
amelia_fit$imputations$imp8$escolaridad_rec,
amelia_fit$imputations$imp9$escolaridad_rec,
amelia_fit$imputations$imp10$escolaridad_rec,
amelia_fit$imputations$imp11$escolaridad_rec,
amelia_fit$imputations$imp12$escolaridad_rec,
amelia_fit$imputations$imp13$escolaridad_rec,
amelia_fit$imputations$imp14$escolaridad_rec,
amelia_fit$imputations$imp15$escolaridad_rec,
amelia_fit$imputations$imp16$escolaridad_rec,
amelia_fit$imputations$imp17$escolaridad_rec,
amelia_fit$imputations$imp18$escolaridad_rec,
amelia_fit$imputations$imp19$escolaridad_rec,
amelia_fit$imputations$imp20$escolaridad_rec,
amelia_fit$imputations$imp21$escolaridad_rec,
amelia_fit$imputations$imp22$escolaridad_rec,
amelia_fit$imputations$imp23$escolaridad_rec,
amelia_fit$imputations$imp24$escolaridad_rec,
amelia_fit$imputations$imp25$escolaridad_rec,
amelia_fit$imputations$imp26$escolaridad_rec,
amelia_fit$imputations$imp27$escolaridad_rec,
amelia_fit$imputations$imp28$escolaridad_rec,
amelia_fit$imputations$imp29$escolaridad_rec,
amelia_fit$imputations$imp30$escolaridad_rec,
amelia_fit$imputations$imp31$escolaridad_rec,
amelia_fit$imputations$imp32$escolaridad_rec,
amelia_fit$imputations$imp33$escolaridad_rec,
amelia_fit$imputations$imp34$escolaridad_rec,
amelia_fit$imputations$imp35$escolaridad_rec,
amelia_fit$imputations$imp36$escolaridad_rec,
amelia_fit$imputations$imp37$escolaridad_rec,
amelia_fit$imputations$imp38$escolaridad_rec,
amelia_fit$imputations$imp39$escolaridad_rec,
amelia_fit$imputations$imp40$escolaridad_rec,
amelia_fit$imputations$imp41$escolaridad_rec,
amelia_fit$imputations$imp42$escolaridad_rec,
amelia_fit$imputations$imp43$escolaridad_rec,
amelia_fit$imputations$imp44$escolaridad_rec,
amelia_fit$imputations$imp45$escolaridad_rec,
amelia_fit$imputations$imp46$escolaridad_rec,
amelia_fit$imputations$imp47$escolaridad_rec,
amelia_fit$imputations$imp48$escolaridad_rec,
amelia_fit$imputations$imp49$escolaridad_rec,
amelia_fit$imputations$imp50$escolaridad_rec,
amelia_fit$imputations$imp51$escolaridad_rec,
amelia_fit$imputations$imp52$escolaridad_rec,
amelia_fit$imputations$imp53$escolaridad_rec,
amelia_fit$imputations$imp54$escolaridad_rec,
amelia_fit$imputations$imp55$escolaridad_rec,
amelia_fit$imputations$imp56$escolaridad_rec,
amelia_fit$imputations$imp57$escolaridad_rec,
amelia_fit$imputations$imp58$escolaridad_rec,
amelia_fit$imputations$imp59$escolaridad_rec,
amelia_fit$imputations$imp60$escolaridad_rec) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
# 1-Mild 2-Moderate 3-Severe
dplyr::summarise(primary_3=sum(value == "3-Completed primary school or less",na.rm=T),
high_sc_2=sum(value == "2-Completed high school or less",na.rm=T),
more_h_sc_1=sum(value =="1-More than high school",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(escolaridad_rec_imp= dplyr::case_when(
(more_h_sc_1>primary_3) & (more_h_sc_1>high_sc_2)~"1-More than high school",
(high_sc_2>primary_3) & (high_sc_2>more_h_sc_1)~"2-Completed high school or less",
(primary_3>more_h_sc_1) & (primary_3>high_sc_2)~"3-Completed primary school or less"
)) %>%
#2) Resolve ties
dplyr::mutate(ties= dplyr::case_when(is.na(escolaridad_rec_imp)~1,T~0)) %>%
dplyr::mutate(escolaridad_rec_imp= dplyr::case_when(ties==1 & ((primary_3>more_h_sc_1)|(primary_3>high_sc_2))~"3-Completed primary school or less", ties==1 & ((high_sc_2>primary_3)|(high_sc_2>more_h_sc_1))~"2-Completed high school or less",
T~escolaridad_rec_imp))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_women_miss4<-
CONS_C1_df_dup_SEP_2020_women_miss3 %>%
dplyr::left_join(escolaridad_rec_imputed[,c("amelia_fit_imputations_imp1_row","escolaridad_rec_imp")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(escolaridad_rec=factor(dplyr::case_when(is.na(escolaridad_rec)~ escolaridad_rec_imp,
TRUE~as.character(escolaridad_rec)))) %>%
dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c("1-More than high school", "2-Completed high school or less","3-Completed primary school or less"), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::select(-escolaridad_rec_imp) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss4)-nrow(CONS_C1_df_dup_SEP_2020_women_miss3)>0){
warning("AGS: Some rows were added in the imputation")}
We ended having 0 missing values in educational attainment.
Marital status
Additionally, we replaced missing values of the marital status (n=34). Since different marital status were not clearly more vulnerable between each other, we selected the most frequent imputed value among the different imputed databases. Only in case of ties in the candidate values, we resolved them by discarding “married” status, which could be somehow less vulnerable than other categories.
# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$estado_conyugal_2,
amelia_fit$imputations$imp2$estado_conyugal_2,
amelia_fit$imputations$imp3$estado_conyugal_2,
amelia_fit$imputations$imp4$estado_conyugal_2,
amelia_fit$imputations$imp5$estado_conyugal_2,
amelia_fit$imputations$imp6$estado_conyugal_2,
amelia_fit$imputations$imp7$estado_conyugal_2,
amelia_fit$imputations$imp8$estado_conyugal_2,
amelia_fit$imputations$imp9$estado_conyugal_2,
amelia_fit$imputations$imp10$estado_conyugal_2,
amelia_fit$imputations$imp11$estado_conyugal_2,
amelia_fit$imputations$imp12$estado_conyugal_2,
amelia_fit$imputations$imp13$estado_conyugal_2,
amelia_fit$imputations$imp14$estado_conyugal_2,
amelia_fit$imputations$imp15$estado_conyugal_2,
amelia_fit$imputations$imp16$estado_conyugal_2,
amelia_fit$imputations$imp17$estado_conyugal_2,
amelia_fit$imputations$imp18$estado_conyugal_2,
amelia_fit$imputations$imp19$estado_conyugal_2,
amelia_fit$imputations$imp20$estado_conyugal_2,
amelia_fit$imputations$imp21$estado_conyugal_2,
amelia_fit$imputations$imp22$estado_conyugal_2,
amelia_fit$imputations$imp23$estado_conyugal_2,
amelia_fit$imputations$imp24$estado_conyugal_2,
amelia_fit$imputations$imp25$estado_conyugal_2,
amelia_fit$imputations$imp26$estado_conyugal_2,
amelia_fit$imputations$imp27$estado_conyugal_2,
amelia_fit$imputations$imp28$estado_conyugal_2,
amelia_fit$imputations$imp29$estado_conyugal_2,
amelia_fit$imputations$imp30$estado_conyugal_2,
amelia_fit$imputations$imp31$estado_conyugal_2,
amelia_fit$imputations$imp32$estado_conyugal_2,
amelia_fit$imputations$imp33$estado_conyugal_2,
amelia_fit$imputations$imp34$estado_conyugal_2,
amelia_fit$imputations$imp35$estado_conyugal_2,
amelia_fit$imputations$imp36$estado_conyugal_2,
amelia_fit$imputations$imp37$estado_conyugal_2,
amelia_fit$imputations$imp38$estado_conyugal_2,
amelia_fit$imputations$imp39$estado_conyugal_2,
amelia_fit$imputations$imp40$estado_conyugal_2,
amelia_fit$imputations$imp41$estado_conyugal_2,
amelia_fit$imputations$imp42$estado_conyugal_2,
amelia_fit$imputations$imp43$estado_conyugal_2,
amelia_fit$imputations$imp44$estado_conyugal_2,
amelia_fit$imputations$imp45$estado_conyugal_2,
amelia_fit$imputations$imp46$estado_conyugal_2,
amelia_fit$imputations$imp47$estado_conyugal_2,
amelia_fit$imputations$imp48$estado_conyugal_2,
amelia_fit$imputations$imp49$estado_conyugal_2,
amelia_fit$imputations$imp50$estado_conyugal_2,
amelia_fit$imputations$imp51$estado_conyugal_2,
amelia_fit$imputations$imp52$estado_conyugal_2,
amelia_fit$imputations$imp53$estado_conyugal_2,
amelia_fit$imputations$imp54$estado_conyugal_2,
amelia_fit$imputations$imp55$estado_conyugal_2,
amelia_fit$imputations$imp56$estado_conyugal_2,
amelia_fit$imputations$imp57$estado_conyugal_2,
amelia_fit$imputations$imp58$estado_conyugal_2,
amelia_fit$imputations$imp59$estado_conyugal_2,
amelia_fit$imputations$imp60$estado_conyugal_2,
amelia_fit$imputations$imp61$estado_conyugal_2
)
estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
janitor::clean_names()
estado_conyugal_2_imputed_cat_est_cony<-
estado_conyugal_2_imputed %>%
tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::filter(estado_conyugal_2_imputed_max==count) %>%
dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_row=n()) %>%
dplyr::ungroup() %>%
dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
TRUE~cat_est_conyugal)) %>%
dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>%
dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
))%>%
janitor::clean_names()
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss5_prev<-
CONS_C1_df_dup_SEP_2020_women_miss4 %>%
dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>%
dplyr::select(-cat_est_conyugal) %>%
data.table()
# casos problemáticos de matrimonio c(59664, 17582, 161721, 36520)
no_calzaron_estado_cony<-
CONS_C1_df_dup_SEP_2020_women_miss5_prev %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(row) %>% unlist()
estado_conyugal_2_imputed2<-
estado_conyugal_2_imputed %>%
dplyr::filter(amelia_fit_imputations_imp1_row %in% no_calzaron_estado_cony) %>%
dplyr::select(amelia_fit_imputations_imp1_row, estado_conyugal_2_married, estado_conyugal_2_sep_div,estado_conyugal_2_singl, estado_conyugal_2_wid, estado_conyugal_2_tot, cat_est_conyugal) %>%
melt(id.vars="amelia_fit_imputations_imp1_row") %>%
dplyr::mutate(value=as.numeric(value)) %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::filter(value!="cat_est_conyugal") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
slice_max(value, with_ties = T) %>%
dplyr::filter(variable!="estado_conyugal_2_married") %>%
dplyr::left_join(CONS_C1_df_dup_SEP_2020_women[,c("row","edad_al_ing")], by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::mutate(value=dplyr::case_when(variable=="estado_conyugal_2_sep_div"~value*10,
T~value)) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
slice_max(value, with_ties = T) %>%
dplyr::ungroup() %>%
dplyr::mutate(marital_status_imp=dplyr::case_when(grepl("_singl",variable)~"Single",
grepl("_sep_div",variable)~"Separated/Divorced",
grepl("_married",variable)~"Married/Shared living arrangements",
grepl("_wid",variable)~"Widower"
))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#2nd round of imputation for ties
CONS_C1_df_dup_SEP_2020_women_miss5<-
CONS_C1_df_dup_SEP_2020_women_miss5_prev %>%
dplyr::left_join(dplyr::select(estado_conyugal_2_imputed2,amelia_fit_imputations_imp1_row,marital_status_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(marital_status_imp),TRUE~as.character(estado_conyugal_2)))) %>%
dplyr::select(-marital_status_imp) %>%
data.table()
#CONS_C1_df_dup_SEP_2020_women_miss5 %>%
#dplyr::filter(hash_key %in% CONS_C1_df_dup_SEP_2020_women_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist())
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss5)-nrow(CONS_C1_df_dup_SEP_2020_women_miss4)>0){
warning("AGS: Some rows were added in the imputation")}
We could not resolve Marital status in 0 cases due to ties in the most frequent values.
Cause of Discharge
We looked over possible imputations to the truly missing values, discarding missing values due to censorship (n=3). In case of ties, we replace with the more vulnerable value.
motivo_de_egreso_a_imputar<-
CONS_C1_df_dup_SEP_2020_women_miss %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_imp)) %>% dplyr::filter(!is.na(fech_egres_imp))%>%dplyr::select(row)
## Joining, by = "row"
#CONS_C1_df_dup_SEP_2020 %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>%
# dplyr::select(row, hash_key, motivodeegreso_mod_imp, fech_egres_imp)
# dplyr::filter(fech_egres_imp=="2019-11-13")
motivodeegreso_mod_imp_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$motivodeegreso_mod_imp,
amelia_fit$imputations$imp2$motivodeegreso_mod_imp,
amelia_fit$imputations$imp3$motivodeegreso_mod_imp,
amelia_fit$imputations$imp4$motivodeegreso_mod_imp,
amelia_fit$imputations$imp5$motivodeegreso_mod_imp,
amelia_fit$imputations$imp6$motivodeegreso_mod_imp,
amelia_fit$imputations$imp7$motivodeegreso_mod_imp,
amelia_fit$imputations$imp8$motivodeegreso_mod_imp,
amelia_fit$imputations$imp9$motivodeegreso_mod_imp,
amelia_fit$imputations$imp10$motivodeegreso_mod_imp,
amelia_fit$imputations$imp11$motivodeegreso_mod_imp,
amelia_fit$imputations$imp12$motivodeegreso_mod_imp,
amelia_fit$imputations$imp13$motivodeegreso_mod_imp,
amelia_fit$imputations$imp14$motivodeegreso_mod_imp,
amelia_fit$imputations$imp15$motivodeegreso_mod_imp,
amelia_fit$imputations$imp16$motivodeegreso_mod_imp,
amelia_fit$imputations$imp17$motivodeegreso_mod_imp,
amelia_fit$imputations$imp18$motivodeegreso_mod_imp,
amelia_fit$imputations$imp19$motivodeegreso_mod_imp,
amelia_fit$imputations$imp20$motivodeegreso_mod_imp,
amelia_fit$imputations$imp21$motivodeegreso_mod_imp,
amelia_fit$imputations$imp22$motivodeegreso_mod_imp,
amelia_fit$imputations$imp23$motivodeegreso_mod_imp,
amelia_fit$imputations$imp24$motivodeegreso_mod_imp,
amelia_fit$imputations$imp25$motivodeegreso_mod_imp,
amelia_fit$imputations$imp26$motivodeegreso_mod_imp,
amelia_fit$imputations$imp27$motivodeegreso_mod_imp,
amelia_fit$imputations$imp28$motivodeegreso_mod_imp,
amelia_fit$imputations$imp29$motivodeegreso_mod_imp,
amelia_fit$imputations$imp30$motivodeegreso_mod_imp,
amelia_fit$imputations$imp31$motivodeegreso_mod_imp,
amelia_fit$imputations$imp32$motivodeegreso_mod_imp,
amelia_fit$imputations$imp33$motivodeegreso_mod_imp,
amelia_fit$imputations$imp34$motivodeegreso_mod_imp,
amelia_fit$imputations$imp35$motivodeegreso_mod_imp,
amelia_fit$imputations$imp36$motivodeegreso_mod_imp,
amelia_fit$imputations$imp37$motivodeegreso_mod_imp,
amelia_fit$imputations$imp38$motivodeegreso_mod_imp,
amelia_fit$imputations$imp39$motivodeegreso_mod_imp,
amelia_fit$imputations$imp40$motivodeegreso_mod_imp,
amelia_fit$imputations$imp41$motivodeegreso_mod_imp,
amelia_fit$imputations$imp42$motivodeegreso_mod_imp,
amelia_fit$imputations$imp43$motivodeegreso_mod_imp,
amelia_fit$imputations$imp44$motivodeegreso_mod_imp,
amelia_fit$imputations$imp45$motivodeegreso_mod_imp,
amelia_fit$imputations$imp46$motivodeegreso_mod_imp,
amelia_fit$imputations$imp47$motivodeegreso_mod_imp,
amelia_fit$imputations$imp48$motivodeegreso_mod_imp,
amelia_fit$imputations$imp49$motivodeegreso_mod_imp,
amelia_fit$imputations$imp50$motivodeegreso_mod_imp,
amelia_fit$imputations$imp51$motivodeegreso_mod_imp,
amelia_fit$imputations$imp52$motivodeegreso_mod_imp,
amelia_fit$imputations$imp53$motivodeegreso_mod_imp,
amelia_fit$imputations$imp54$motivodeegreso_mod_imp,
amelia_fit$imputations$imp55$motivodeegreso_mod_imp,
amelia_fit$imputations$imp56$motivodeegreso_mod_imp,
amelia_fit$imputations$imp57$motivodeegreso_mod_imp,
amelia_fit$imputations$imp58$motivodeegreso_mod_imp,
amelia_fit$imputations$imp59$motivodeegreso_mod_imp,
amelia_fit$imputations$imp60$motivodeegreso_mod_imp,
amelia_fit$imputations$imp61$motivodeegreso_mod_imp
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::filter(amelia_fit_imputations_imp1_row %in% unlist(motivo_de_egreso_a_imputar$row)) %>%
#FILTRAR CASOS QUE SON ILÓGICOS: MUERTES CON TRATAMIENTOS POSTERIORES (1)
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,dup, duplicates_filtered,evaluacindelprocesoteraputico,fech_ing_next_treat),by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::mutate(value_death=dplyr::case_when(value=="Death"& !is.na(fech_ing_next_treat)~1,T~0)) %>%
dplyr::filter(value_death!=1) %>%
#FILTRAR CASOS QUE SON ILÓGICOS: NO PUEDEN HABER TRATAMIENTOS EN CURSO CON TRATAMIENTOS POSTERIORES (2)
dplyr::mutate(value_fail=dplyr::case_when(value=="Ongoing treatment"& !is.na(fech_ing_next_treat)~1,T~0)) %>%
dplyr::filter(value_fail!=1) %>%
#FILTRAR CASOS QUE SON ILÓGICOS: NO PUEDE HABER OTRA COSA QUE TRATAMIENTO EN CURSO CON FECHA DE CENSURA
dplyr::mutate(value_ong=dplyr::case_when(value!="Ongoing treatment" & fech_egres_imp=="2019-11-13"~1,T~0)) %>%
dplyr::filter(value_ong!=1) %>%
#:#:#:#:#:
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(adm_dis=sum(value == "Administrative discharge",na.rm=T),
death=sum(value == "Death",na.rm=T),
referral=sum(value == "Referral to another treatment",na.rm=T),
ther_dis=sum(value == "Therapeutic discharge",na.rm=T),
on_treat=sum(value == "Ongoing treatment",na.rm=T),
dropout=sum(value =="Drop-out",na.rm=T)) %>%
melt(id.vars="amelia_fit_imputations_imp1_row") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::slice_max(value) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n=n()) %>%
dplyr::mutate(emp=dplyr::case_when(variable=="adm_dis" & n>1~1,T~0)) %>%
dplyr::filter(emp!=1) %>%
dplyr::mutate(motivodeegreso_mod_imp_imputation=
dplyr::case_when(variable=="adm_dis"~"Administrative discharge",
variable=="death"~"Death",
variable=="ther_dis"~"Therapeutic discharge",
variable=="on_treat"~"Ongoing treatment",
variable=="referral"~"Referral to another treatment",
variable=="dropout"~"Drop-out"))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
CONS_C1_df_dup_SEP_2020_women_miss6<-
CONS_C1_df_dup_SEP_2020_women_miss5 %>%
dplyr::left_join(motivodeegreso_mod_imp_imputed[,c("amelia_fit_imputations_imp1_row","motivodeegreso_mod_imp_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::select(row,hash_key,motivodeegreso_mod_imp_original, motivodeegreso_mod_imp_imputation,motivodeegreso_mod_imp,fech_egres_num,fech_egres_imp)
dplyr::left_join(cbind.data.frame(motivo_de_egreso_a_imputar,value_to_impute=1),"row") %>%
dplyr::mutate(motivodeegreso_mod_imp=factor(
dplyr::case_when(is.na(motivodeegreso_mod_imp) & value_to_impute==1~motivodeegreso_mod_imp_imputation,
T~as.character(motivodeegreso_mod_imp)))) %>%
dplyr::select(-motivodeegreso_mod_imp_imputation,-value_to_impute) %>%
data.table()
#CONS_C1_df_dup_SEP_2020_women_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original)
#CONS_C1_df_dup_SEP_2020_women_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp_original)
CONS_C1_df_dup_SEP_2020_women_miss6 %>% janitor::tabyl(motivodeegreso_mod_imp) %>%
dplyr::mutate(percent=scales::percent(percent)) %>%
knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
caption = paste0("Table 2. Imputed Cause of Discharge vs. Original Cause of Discharge"),
#col.names = c("Cause of Discharge","1-High Achievement", "2- Medium Achievement","3- Minimum Achievement","Null Values"),
align =rep('c', 101)) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12) %>%
kableExtra::add_footnote("Note. NA= Null values", notation="none") %>%
kableExtra::scroll_box(width = "100%", height = "375px")
Table 2. Imputed Cause of Discharge vs. Original Cause of Discharge
|
motivodeegreso_mod_imp
|
n
|
percent
|
|
Administrative discharge
|
1,745
|
8.15%
|
|
Early Drop-out
|
3,165
|
14.77%
|
|
Late Drop-out
|
7,108
|
33.18%
|
|
Ongoing treatment
|
1,601
|
7.47%
|
|
Referral to another treatment
|
2,676
|
12.49%
|
|
Therapeutic discharge
|
5,128
|
23.94%
|
|
Note. NA= Null values
|
#
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss6)-nrow(CONS_C1_df_dup_SEP_2020_women_miss5)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Biopsychosocial involvement
Another variable that is worth imputing is the Biopsychosocial involvement (n= 371). In case of ties, we selected the imputed values with the value with the minimum involvement. In case of ties, we chose the most vulnerable value.
# Ver distintos valores propuestos para sustancia de inciio
#No se ve un patrón de dependencia entre el compromiso biopsicosocial y el estatus de egreso
# table(CONS_C1_df_dup_SEP_2020_women_miss$compromiso_biopsicosocial,
# CONS_C1_df_dup_SEP_2020_women_miss$motivodeegreso_mod_imp)
comp_biopsisoc_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$compromiso_biopsicosocial,
amelia_fit$imputations$imp2$compromiso_biopsicosocial,
amelia_fit$imputations$imp3$compromiso_biopsicosocial,
amelia_fit$imputations$imp4$compromiso_biopsicosocial,
amelia_fit$imputations$imp5$compromiso_biopsicosocial,
amelia_fit$imputations$imp6$compromiso_biopsicosocial,
amelia_fit$imputations$imp7$compromiso_biopsicosocial,
amelia_fit$imputations$imp8$compromiso_biopsicosocial,
amelia_fit$imputations$imp9$compromiso_biopsicosocial,
amelia_fit$imputations$imp10$compromiso_biopsicosocial,
amelia_fit$imputations$imp11$compromiso_biopsicosocial,
amelia_fit$imputations$imp12$compromiso_biopsicosocial,
amelia_fit$imputations$imp13$compromiso_biopsicosocial,
amelia_fit$imputations$imp14$compromiso_biopsicosocial,
amelia_fit$imputations$imp15$compromiso_biopsicosocial,
amelia_fit$imputations$imp16$compromiso_biopsicosocial,
amelia_fit$imputations$imp17$compromiso_biopsicosocial,
amelia_fit$imputations$imp18$compromiso_biopsicosocial,
amelia_fit$imputations$imp19$compromiso_biopsicosocial,
amelia_fit$imputations$imp20$compromiso_biopsicosocial,
amelia_fit$imputations$imp21$compromiso_biopsicosocial,
amelia_fit$imputations$imp22$compromiso_biopsicosocial,
amelia_fit$imputations$imp23$compromiso_biopsicosocial,
amelia_fit$imputations$imp24$compromiso_biopsicosocial,
amelia_fit$imputations$imp25$compromiso_biopsicosocial,
amelia_fit$imputations$imp26$compromiso_biopsicosocial,
amelia_fit$imputations$imp27$compromiso_biopsicosocial,
amelia_fit$imputations$imp28$compromiso_biopsicosocial,
amelia_fit$imputations$imp29$compromiso_biopsicosocial,
amelia_fit$imputations$imp30$compromiso_biopsicosocial,
amelia_fit$imputations$imp31$compromiso_biopsicosocial,
amelia_fit$imputations$imp32$compromiso_biopsicosocial,
amelia_fit$imputations$imp33$compromiso_biopsicosocial,
amelia_fit$imputations$imp34$compromiso_biopsicosocial,
amelia_fit$imputations$imp35$compromiso_biopsicosocial,
amelia_fit$imputations$imp36$compromiso_biopsicosocial,
amelia_fit$imputations$imp37$compromiso_biopsicosocial,
amelia_fit$imputations$imp38$compromiso_biopsicosocial,
amelia_fit$imputations$imp39$compromiso_biopsicosocial,
amelia_fit$imputations$imp40$compromiso_biopsicosocial,
amelia_fit$imputations$imp41$compromiso_biopsicosocial,
amelia_fit$imputations$imp42$compromiso_biopsicosocial,
amelia_fit$imputations$imp43$compromiso_biopsicosocial,
amelia_fit$imputations$imp44$compromiso_biopsicosocial,
amelia_fit$imputations$imp45$compromiso_biopsicosocial,
amelia_fit$imputations$imp46$compromiso_biopsicosocial,
amelia_fit$imputations$imp47$compromiso_biopsicosocial,
amelia_fit$imputations$imp48$compromiso_biopsicosocial,
amelia_fit$imputations$imp49$compromiso_biopsicosocial,
amelia_fit$imputations$imp50$compromiso_biopsicosocial,
amelia_fit$imputations$imp51$compromiso_biopsicosocial,
amelia_fit$imputations$imp52$compromiso_biopsicosocial,
amelia_fit$imputations$imp53$compromiso_biopsicosocial,
amelia_fit$imputations$imp54$compromiso_biopsicosocial,
amelia_fit$imputations$imp55$compromiso_biopsicosocial,
amelia_fit$imputations$imp56$compromiso_biopsicosocial,
amelia_fit$imputations$imp57$compromiso_biopsicosocial,
amelia_fit$imputations$imp58$compromiso_biopsicosocial,
amelia_fit$imputations$imp59$compromiso_biopsicosocial,
amelia_fit$imputations$imp60$compromiso_biopsicosocial,
amelia_fit$imputations$imp61$compromiso_biopsicosocial
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
# 1-Mild 2-Moderate 3-Severe
dplyr::summarise(severe_3=sum(value == "3-Severe",na.rm=T),
mod_2=sum(value == "2-Moderate",na.rm=T),
mild_1=sum(value =="1-Mild",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(comp_biopsisoc_imp= dplyr::case_when(
(severe_3>mild_1) & (severe_3>mod_2)~"3-Severe",
(mod_2>mild_1) & (mod_2>severe_3)~"2-Moderate",
(mild_1>mod_2) & (mild_1>severe_3)~"1-Mild"
)) %>%
#2) Resolve ties
dplyr::mutate(ties= dplyr::case_when(is.na(comp_biopsisoc_imp)~1,T~0)) %>%
dplyr::mutate(comp_biopsisoc_imp= dplyr::case_when(ties==1 & ((severe_3>mod_2)|(severe_3>mild_1))~"3-Severe",
ties==1 & ((mod_2>mild_1)|(mod_2>severe_3))~"2-Moderate",
T~comp_biopsisoc_imp))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_women_miss7<-
CONS_C1_df_dup_SEP_2020_women_miss6 %>%
dplyr::left_join(comp_biopsisoc_imputed[,c("amelia_fit_imputations_imp1_row","comp_biopsisoc_imp")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(compromiso_biopsicosocial=factor(dplyr::case_when(is.na(compromiso_biopsicosocial) ~comp_biopsisoc_imp,
TRUE~as.character(compromiso_biopsicosocial)))) %>%
dplyr::mutate(compromiso_biopsicosocial=parse_factor(as.character(compromiso_biopsicosocial),levels=c('1-Mild', '2-Moderate','3-Severe'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::select(-comp_biopsisoc_imp) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss7)-nrow(CONS_C1_df_dup_SEP_2020_women_miss6)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Tenure status of households
Another variable that is worth imputing is the Tenure status of households (n= 943). In case of ties, we selected the imputed values with the value with the minimum involvement. In case of ties, we kept what we thought was the most vulnerable value (discarding “Owner” or “Renting” values).
tenencia_de_la_vivienda_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp2$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp3$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp4$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp5$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp6$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp7$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp8$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp9$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp10$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp11$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp12$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp13$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp14$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp15$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp16$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp17$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp18$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp19$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp20$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp21$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp22$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp23$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp24$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp25$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp26$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp27$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp28$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp29$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp30$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp31$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp32$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp33$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp34$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp35$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp36$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp37$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp38$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp39$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp40$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp41$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp42$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp43$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp44$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp45$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp46$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp47$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp48$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp49$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp50$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp51$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp52$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp53$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp54$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp55$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp56$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp57$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp58$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp59$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp60$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp61$tenencia_de_la_vivienda_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row, value) %>%
tally() %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup()
#tenencia_de_la_vivienda_mod_imputed %>%
# pivot_wider(id_cols="amelia_fit_imputations_imp1_row",names_from="value", values_from="n", values_fill=0) %>%
# dplyr::ungroup()
tenencia_de_la_vivienda_mod_imputed_dup<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(num=n()) %>%
dplyr::filter(num>1) %>%
dplyr::ungroup() %>%
#1) owner, discard if it is in the maximum
dplyr::mutate(n=dplyr::case_when(value=="Owner/Transferred dwellings/Pays Dividends"~0,T~as.numeric(n))) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
#2) Renting vs. stays temporarily with a relative, keep the second
dplyr::mutate(n=dplyr::case_when(value=="Renting"~0,T~as.numeric(n))) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_dup=n())
tenencia_de_la_vivienda_mod_imputed_final<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_dup, by=c("amelia_fit_imputations_imp1_row", "value")) %>%
#si es vacío, y no está en la base, es valor 0 (es difícil que)
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(sum= suppressWarnings(max(num, na.rm=T))) %>%
dplyr::ungroup() %>%
#descarto los que presentaron más de un valor para una misma fila y aquellos que no fueron seleccionados
dplyr::mutate(descartar=dplyr::case_when(sum>1 & is.na(n.y)~1,T~0)) %>%
dplyr::filter(descartar==0)
ifelse(nrow(tenencia_de_la_vivienda_mod_imputed_final)/length(unique(CONS_C1_df_dup_SEP_2020_women_miss7$row))>1,
"There are still more than one value in the imputation","")
## [1] ""
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_women_miss8<-
CONS_C1_df_dup_SEP_2020_women_miss7 %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_final[,c("amelia_fit_imputations_imp1_row","value")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tenencia_de_la_vivienda_mod=factor(dplyr::case_when(is.na(tenencia_de_la_vivienda_mod) ~value,
TRUE~as.character(tenencia_de_la_vivienda_mod)))) %>%
dplyr::select(-value) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss8)-nrow(CONS_C1_df_dup_SEP_2020_women_miss7)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Number of children (max. Value) (Dichotomized)
A numeric variable that had a great proportion of missing values was this (n= 82).
As seen in the figure above, most of the imputations were around 1 and 3 children, leaving less space for an imputation of no children or more than 3. We imputed these values, by approximating the mean of the 61 candidate values to a discrete number.
numero_de_hijos_mod_rec_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp2$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp3$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp4$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp5$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp6$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp7$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp8$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp9$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp10$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp11$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp12$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp13$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp14$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp15$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp16$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp17$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp18$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp19$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp20$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp21$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp22$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp23$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp24$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp25$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp26$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp27$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp28$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp29$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp30$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp31$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp32$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp33$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp34$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp35$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp36$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp37$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp38$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp39$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp40$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp41$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp42$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp43$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp44$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp45$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp46$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp47$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp48$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp49$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp50$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp51$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp52$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp53$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp54$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp55$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp56$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp57$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp58$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp59$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp60$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp61$numero_de_hijos_mod_rec
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(children= sum(value=="Yes"),
no_children= sum(value=="No")) %>%
dplyr::mutate(numero_de_hijos_mod_rec_imp=dplyr::case_when(children>=31~"Yes",
no_children>=31~"No"))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss9<-
CONS_C1_df_dup_SEP_2020_women_miss8 %>%
dplyr::left_join(dplyr::select(numero_de_hijos_mod_rec_imputed,amelia_fit_imputations_imp1_row,numero_de_hijos_mod_rec_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(numero_de_hijos_mod_rec=factor(dplyr::case_when(is.na(numero_de_hijos_mod_rec)~as.character(numero_de_hijos_mod_rec_imp),T~as.character(numero_de_hijos_mod_rec)))) %>%
dplyr::select(-numero_de_hijos_mod_rec_imp) %>%
data.table()
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss12$numero_de_hijos_mod_rec))
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss9)-nrow(CONS_C1_df_dup_SEP_2020_women_miss8)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Type of Program
A numeric variable that was important to impute missing values was the type of program (n= 17).
tipo_de_programa_2_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tipo_de_programa_2,
amelia_fit$imputations$imp2$tipo_de_programa_2,
amelia_fit$imputations$imp3$tipo_de_programa_2,
amelia_fit$imputations$imp4$tipo_de_programa_2,
amelia_fit$imputations$imp5$tipo_de_programa_2,
amelia_fit$imputations$imp6$tipo_de_programa_2,
amelia_fit$imputations$imp7$tipo_de_programa_2,
amelia_fit$imputations$imp8$tipo_de_programa_2,
amelia_fit$imputations$imp9$tipo_de_programa_2,
amelia_fit$imputations$imp10$tipo_de_programa_2,
amelia_fit$imputations$imp11$tipo_de_programa_2,
amelia_fit$imputations$imp12$tipo_de_programa_2,
amelia_fit$imputations$imp13$tipo_de_programa_2,
amelia_fit$imputations$imp14$tipo_de_programa_2,
amelia_fit$imputations$imp15$tipo_de_programa_2,
amelia_fit$imputations$imp16$tipo_de_programa_2,
amelia_fit$imputations$imp17$tipo_de_programa_2,
amelia_fit$imputations$imp18$tipo_de_programa_2,
amelia_fit$imputations$imp19$tipo_de_programa_2,
amelia_fit$imputations$imp20$tipo_de_programa_2,
amelia_fit$imputations$imp21$tipo_de_programa_2,
amelia_fit$imputations$imp22$tipo_de_programa_2,
amelia_fit$imputations$imp23$tipo_de_programa_2,
amelia_fit$imputations$imp24$tipo_de_programa_2,
amelia_fit$imputations$imp25$tipo_de_programa_2,
amelia_fit$imputations$imp26$tipo_de_programa_2,
amelia_fit$imputations$imp27$tipo_de_programa_2,
amelia_fit$imputations$imp28$tipo_de_programa_2,
amelia_fit$imputations$imp29$tipo_de_programa_2,
amelia_fit$imputations$imp30$tipo_de_programa_2,
amelia_fit$imputations$imp31$tipo_de_programa_2,
amelia_fit$imputations$imp32$tipo_de_programa_2,
amelia_fit$imputations$imp33$tipo_de_programa_2,
amelia_fit$imputations$imp34$tipo_de_programa_2,
amelia_fit$imputations$imp35$tipo_de_programa_2,
amelia_fit$imputations$imp36$tipo_de_programa_2,
amelia_fit$imputations$imp37$tipo_de_programa_2,
amelia_fit$imputations$imp38$tipo_de_programa_2,
amelia_fit$imputations$imp39$tipo_de_programa_2,
amelia_fit$imputations$imp40$tipo_de_programa_2,
amelia_fit$imputations$imp41$tipo_de_programa_2,
amelia_fit$imputations$imp42$tipo_de_programa_2,
amelia_fit$imputations$imp43$tipo_de_programa_2,
amelia_fit$imputations$imp44$tipo_de_programa_2,
amelia_fit$imputations$imp45$tipo_de_programa_2,
amelia_fit$imputations$imp46$tipo_de_programa_2,
amelia_fit$imputations$imp47$tipo_de_programa_2,
amelia_fit$imputations$imp48$tipo_de_programa_2,
amelia_fit$imputations$imp49$tipo_de_programa_2,
amelia_fit$imputations$imp50$tipo_de_programa_2,
amelia_fit$imputations$imp51$tipo_de_programa_2,
amelia_fit$imputations$imp52$tipo_de_programa_2,
amelia_fit$imputations$imp53$tipo_de_programa_2,
amelia_fit$imputations$imp54$tipo_de_programa_2,
amelia_fit$imputations$imp55$tipo_de_programa_2,
amelia_fit$imputations$imp56$tipo_de_programa_2,
amelia_fit$imputations$imp57$tipo_de_programa_2,
amelia_fit$imputations$imp58$tipo_de_programa_2,
amelia_fit$imputations$imp59$tipo_de_programa_2,
amelia_fit$imputations$imp60$tipo_de_programa_2,
amelia_fit$imputations$imp61$tipo_de_programa_2
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(WE= sum(value=="Women specific"),
GP= sum(value=="General population")) %>%
dplyr::mutate(tipo_de_programa_2_imp=dplyr::case_when(WE>=31~"Women specific",
GP>=31~"General population"))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss10<-
CONS_C1_df_dup_SEP_2020_women_miss9 %>%
dplyr::left_join(dplyr::select(tipo_de_programa_2_imputed,amelia_fit_imputations_imp1_row,tipo_de_programa_2_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tipo_de_programa_2=factor(dplyr::case_when(is.na(tipo_de_programa_2)~as.character(tipo_de_programa_2_imp),T~as.character(tipo_de_programa_2)))) %>%
dplyr::select(-tipo_de_programa_2_imp) %>%
data.table()
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss12$tipo_de_programa_2))
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss10)-nrow(CONS_C1_df_dup_SEP_2020_women_miss9)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Sample Characteristics
We checked the characteristics of the sample depending on type of treatment (Residential or Outpatients).
#añado los imputados
CONS_C1_df_dup_SEP_2020_women_miss_after_imp<-
CONS_C1_df_dup_SEP_2020_women_miss10 %>%
# relocate(otras_sus1_mod, .after = last_col()) %>%
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020, row, fech_ing, fech_egres_imp, fech_ing_num, fech_egres_num, dias_treat_imp_sin_na,fech_ing_next_treat), by="row")%>%
#dplyr::filter(fech_egres_num==18213,!is.na(motivodeegreso_mod_imp)) %>%
dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(dias_treat_imp_sin_na>=90 & motivodeegreso_mod_imp=="Drop-out"~ "Late Drop-out",
dias_treat_imp_sin_na<90 & motivodeegreso_mod_imp=="Drop-out"~ "Early Drop-out",
fech_egres_num==18213 & is.na(motivodeegreso_mod_imp)~"Ongoing treatment",
TRUE~as.character(motivodeegreso_mod_imp)
))) %>%
dplyr::mutate(sum_miss = base::rowSums(is.na(dplyr::select(.,c(tipo_de_programa_2:motivodeegreso_mod_imp))))) %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(sum_miss=sum(sum_miss)) %>%
dplyr::ungroup()
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_descartados <-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp %>%
dplyr::filter(sum_miss>0)
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_descartados %>%
rowwise %>%
dplyr::mutate_at(.vars = vars(vector_variables),
.funs = ~ifelse(is.na(.), 1, 0)) %>%
dplyr::ungroup() %>%
dplyr::summarise_at(vars(vector_variables),~sum(.)) %>%
melt
## No id variables; using all as measure variables
## variable value
## 1 tipo_de_programa_2 0
## 2 estado_conyugal_2 0
## 3 edad_al_ing_grupos 0
## 4 escolaridad_rec 0
## 5 sus_principal_mod 0
## 6 freq_cons_sus_prin 0
## 7 compromiso_biopsicosocial 0
## 8 tenencia_de_la_vivienda_mod 0
## 9 num_otras_sus_mod 0
## 10 numero_de_hijos_mod_rec 0
## 11 motivodeegreso_mod_imp 0
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#BASE DE DATOS DEFINITIVA#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss_after_imp %>%
dplyr::filter(sum_miss==0) %>%
dplyr::select(-sum_miss) %>%
#DAR FORMATO ORDINAL A LAS VARIABLES
dplyr::mutate(edad_al_ing_grupos=parse_factor(as.character(edad_al_ing_grupos),levels=c('<18-29', '30-39', '40-49', '50+'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(compromiso_biopsicosocial=parse_factor(as.character(compromiso_biopsicosocial),levels=c('1-Mild', '2-Moderate','3-Severe'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(num_otras_sus_mod=parse_factor(as.character(num_otras_sus_mod),levels=c('No additional substance', 'One additional substance','More than one additional substance'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(fech_ing_next_treat_date=as.Date(fech_ing_next_treat)) %>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
data.table::data.table() %>%
assign("CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados",.,envir=.GlobalEnv)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$tipo_de_programa_2,"label") <- 'Type of program'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$edad_al_ing_grupos,"label") <- 'Age at admission to treatment, grouped.'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$estado_conyugal_2,"label") <- 'Marital status'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$numero_de_hijos_mod_rec,"label") <- 'Have children (Dichotomized)'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$sus_principal_mod,"label") <- 'Primary or main substance'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$num_otras_sus_mod,"label") <- 'Co-occurring SUD'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$freq_cons_sus_prin,"label") <- 'Consumption frequency of primary or main substance'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$compromiso_biopsicosocial,"label") <- 'Biopsychosocial involvement'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$tenencia_de_la_vivienda_mod,"label") <- 'Tenure status of households'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$escolaridad_rec,"label") <- 'Educational Attainment'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$motivodeegreso_mod_imp,"label") <- 'Cause of Discharge'
attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$fech_ing_next_treat_date,"label") <- 'Date of Admission to Posterior Treatment'
#attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$tipo_de_plan_res,"label") <- 'Setting of Treatment'
#attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$tipo_centro,"label") <- 'Type of center of the last entry'
#attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$embarazo,"label") <- 'Pregnant at admission'
#attr(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados$edad_ini_sus_prin_grupos,"label") <- 'Age at first use of principal substance, grouped'
We ended the process having 21,423 compelte cases (users= 21,423).
kableone <- function(x, ...) {
capture.output(x <- print(x,...))
knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","))
}
Variables_after_imp<-c("estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod","freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","motivodeegreso_mod_imp","dias_treat_imp_sin_na")
catVars_after_imp<-
c("estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod","freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","motivodeegreso_mod_imp")
pre_tab1<-Sys.time()
tab1<-
CreateTableOne(vars = Variables_after_imp,
strata = "tipo_de_programa_2",
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados,
factorVars = catVars_after_imp,
smd=T)
post_tab1<-Sys.time()
diff_time_tab1=post_tab1-pre_tab1
kableone(tab1,
caption = paste0("Table 4. Covariate Balance in the Variables of Interest"),
col.names= c("Variables","Ambulatory","Residential", "p-values","test","SMD"),
nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num"),#"\\hline",
smd=T, test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 11) %>%
#()
row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 11) %>%
scroll_box(width = "100%", height = "400px")
#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne
## Construct a table
#standardized mean differences of greater than 0.1
Multi-state
# dplyr::filter(motivodeegreso_mod_imp!="En curso")%>% #Sacar los tratamientos que estén en curso
tab1_lab<- paste0('Original C1 Dataset \n(n = ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab2_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab1_5_lab<- paste0('• Duplicated entries\\l • Overlapping treatments of users\\l • Intermediate events of treatment (continuous referrals)')
tab22_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_women), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_women%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab32_lab <- paste0('•Select women (n= ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::filter(sexo_2=="Women") %>% nrow(), format='f', big.mark=',', digits=0), '; users=',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::filter(sexo_2=="Women") %>% distinct(hash_key) %>% nrow(), format='f', big.mark=',', digits=0),')\\\\l•Select first treatments (n= ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::filter(sexo_2=="Women", dup==1) %>% nrow(), format='f', big.mark=',', digits=0), '; users=',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::filter(sexo_2=="Women", dup==1) %>% distinct(hash_key) %>% nrow(), format='f', big.mark=',', digits=0),')\\\\l')
tab3_5_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_descartados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_descartados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab_miss <-paste0(" Impute missing values: (n=",CONS_C1_df_dup_SEP_2020_women_miss[,..vector_variables_only_for_imputation] %>% complete.cases() %>% janitor::tabyl() %>% dplyr::filter(.=="FALSE") %>% dplyr::select(n) %>% as.numeric() %>% format(big.mark=","),")")
tab4_lab<- paste0('Imputed C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_women_miss_after_imp), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_women_miss_after_imp%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab_ref_lab <- tab_miss <-paste0("• Remove Referrals: (n=",CONS_C1_df_dup_SEP_2020_women_miss_after_imp %>% dplyr::filter(motivodeegreso_mod_imp=="Referral to another treatment") %>% nrow() %>% format(big.mark=","),")\\\\l")
tab_final <- tab_miss <-paste0(" Imputed C1 Dataset w/o Referrals: \n(n=",CONS_C1_df_dup_SEP_2020_women_miss_after_imp %>% dplyr::filter(motivodeegreso_mod_imp!="Referral to another treatment") %>% nrow() %>% format(big.mark=","),'\n users= ',CONS_C1_df_dup_SEP_2020_women_miss_after_imp %>% dplyr::filter(motivodeegreso_mod_imp!="Referral to another treatment") %>% distinct(hash_key) %>% nrow() %>% format(big.mark=","),")")
#https://stackoverflow.com/questions/46750364/diagrammer-and-graphviz
#https://mikeyharper.uk/flowcharts-in-r-using-diagrammer/
#http://blog.nguyenvq.com/blog/2012/05/29/better-decision-tree-graphics-for-rpart-via-party-and-partykit/
#http://blog.nguyenvq.com/blog/2014/01/17/skeleton-to-create-fast-automatic-tree-diagrams-using-r-and-graphviz/
#https://cran.r-project.org/web/packages/DiagrammeR/vignettes/graphviz-mermaid.html
#https://stackoverflow.com/questions/39133058/how-to-use-graphviz-graphs-in-diagrammer-for-r
#https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781789802566/1/ch01lvl1sec21/creating-diagrams-via-the-diagrammer-package
#https://justlegal.be/2019/05/using-flowcharts-to-display-legal-procedures/
# paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
#
library(DiagrammeR) #⋉
grViz("digraph flowchart {
# node definitions with substituted label text
node [fontname = Times, shape = rectangle,fontsize = 9]
tab1 [label = '@@1']
tab2 [label = '@@2']
tab32 [label = '@@5',fontsize = 7]
tab22 [label = '@@6']
tab3 [label = '•Duplicated entries\\l•Intermediate events of treatment (continuous referrals)\\l',fontsize = 7]
tab4 [label = '@@4']
blank [label = '', width = 0.0001, height = 0.0001]
blank1 [label = '', width = 0.0001, height = 0.0001]
blank2 [label = '', width = 0.0001, height = 0.0001]
blank3 [label = '', width = 0.0001, height = 0.0001]
tab5 [label = '•Logically inconsistent candidates for imputation\\l•Ties in candidates for imputation\\l',fontsize = 7]
tab6 [label = '@@8',fontsize = 7,style=dotted]
tab7 [label = '@@9',style=dotted]
# edge definitions with the node IDs
tab1 -> blank [arrowhead = none,label=' Data wrangling and normalization process',fontsize = 8];
blank -> tab3
blank -> tab2
tab2 -> blank1 [arrowhead = none,label=' Sample selection',fontsize = 8];
blank1 -> tab32
blank1 -> tab22
tab22 -> blank2 [arrowhead = none, label= '@@7', fontsize= 8];
blank2 -> tab5
blank2 -> tab4 [label=' Result of the imputation of missing values',fontsize = 8];
tab4 -> blank3 [arrowhead = none,label=' Unkown exact times of discharge',fontsize = 8, style=dotted];
blank3 -> tab6 [style=dotted]
blank3 -> tab7 [style=dotted]
subgraph {
rank = same; tab3; blank;
}
subgraph {
rank = same; tab32; blank1;
}
subgraph {
rank = same; tab5; blank2;
}
subgraph {
rank = same; tab6; blank3;
}
}
[1]: tab1_lab
[2]: tab2_lab
[3]: tab1_5_lab
[4]: tab4_lab
[5]: tab32_lab
[6]: tab22_lab
[7]: tab_miss
[8]: tab_ref_lab
[9]: tab_final
")
# {rank=same; 'tab2'' -> tab3 [label='',fontsize = 11]}; #⋉
#CONS_C1_df_dup_SEP_2020_irrs_health
Transition matrix
library(igraph)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
## 3 ESTADOS SIMPLES ##
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
links<-data.frame(stringsAsFactors=FALSE,
from = c(rep("Admitted to\nTreatment", 2),"Therapeutic\nDischarge"),
to = c(rep(c("Therapeutic\nDischarge"),1), rep("Readmission", 2)),
value = c("1","2","3"))
links2<-data.frame(stringsAsFactors=FALSE,
from = c(rep("Admitted to\nTreatment", 3),"Therapeutic\nDischarge","Discharge w/o\nClinical Advice"),
to = c(rep(c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice"),1), rep("Readmission", 3)),
value = c("1","2","3","4","5"))
#dev.off()
#https://www.r-graph-gallery.com/248-igraph-plotting-parameters.html
#https://rstudio-pubs-static.s3.amazonaws.com/341807_7b9d1a4e787146d492115f90ad75cd2d.html
par(mfrow=c(1, 2))
#for (i in c(2560:2660)){
set.seed(2630) #i #2660 #2646 #2642 #2630 #2650
co <- layout.fruchterman.reingold(graph_from_data_frame(links, directed=TRUE))
plot(graph_from_data_frame(links, directed=TRUE),
asp = 0,
layout= co,
edge.label=links$value,
edge.label.cex=3,
edge.label.color="black",
#vertex.label= rev(),
vertex.color="white",
vertex.size=120,
vertex.size2=25,
vertex.label.cex=1,
edge.arrow.size=1,
edge.color="black",
vertex.shape="rectangle",
vertex.label.color="black",
edge.curved=0,
edge.width=1.5,
#main=paste0(i),
outputorder="edgesfirst",
vertex.label.dist=0,
vertex.cex = 3)
#}
title("a) Three-states Model (Simplest)", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge are not events of interest;\nModdified version of an illness-death model") ## internal titles
set.seed(10990)#i #10990 10921 10898 10835
co2 <- layout.fruchterman.reingold(graph_from_data_frame(links2, directed=TRUE))
plot(graph_from_data_frame(links2, directed=TRUE),
asp = 0,
layout= co2,
edge.label=links2$value,
edge.label.cex=2.5,
edge.label.color="black",
#vertex.label= rev(),
vertex.color="white",
vertex.size=120,
vertex.size2=25,
vertex.label.cex=1,
edge.arrow.size=1,
edge.color="black",
vertex.shape="rectangle",
vertex.label.color="black",
edge.curved=0,
edge.width=1.5,
outputorder="edgesfirst",
#main=paste0(i),
vertex.label.dist=0,
vertex.cex = 3)
title("b) Four-states Model", sub = "No recurring states; Absorving state: Readmission;\n For now, includes Referrals as a discharge /wo Clinical\n Advice") ## internal titles

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
## Probando con paquetes estadísticos
if(no_mostrar==1){
library(igraph)
Nodes <- c("Admitted to\nGP","Admitted to\nWE","Therapeutic\nDischarge","Discharge w/o\nClinical Advice","Readmission") #states possible in MSM
Edges <- list("Admitted to\nGP"=list(edges=c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice")),
"Admitted to\nWE"=list(edges=c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice")),
"Therapeutic\nDischarge"=list(edges=c("Readmission")),
"Discharge /wo\nClinical Advice"=list(edges=c("Readmission")),
"Readmission"=list(edges=NULL)) #transitions from each state
RCLTtree <- new("graphNEL",nodes=Nodes,edgeL=Edges,edgemode="directed")
plot(RCLTtree)
#https://www.rdocumentation.org/packages/msSurv/versions/1.2-2/topics/msSurv
msSurv(LTRCdata, RCLTtree, cens.type="ind", LT=FALSE, bs=FALSE, B=200)
}
#Data should be in a data frame with column names "id", "stop", "st.stage", and "stage" where "id" is the individual's identification number, "stop" is the transition time from state j to j', "st.stage" is the state the individual is transitioning from (i.e., j), and "stage" is the state the individual is transitioning to (i.e., j') and equals 0 if right censored.
## 80% of sample is LT, rest has start time of 0
### AGS: Parten en 0, salvo que estén truncados a la izquierda.
### Parece que todos comparten un mismo tiempo ojetivo.
### AGS: Cuando hay un estado seguido no es necesario interval censoring, se dn en tun tiempo continuo
### El 0 es censura
library(mstate)
## Warning: package 'mstate' was built under R version 4.0.3
#### MSPREP= 3 STATES
#_#_#_#_#_#_#_
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
dplyr::mutate(ther_disch=ifelse(motivodeegreso_mod_imp=="Therapeutic discharge",1,0)) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo
dplyr::mutate(cambio_fecha_ing= dplyr::case_when(dias_treat_imp_sin_na==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_num= dplyr::case_when(cambio_fecha_ing==1~fech_ing_num-1,T~fech_ing_num)) %>%
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(cambio_fecha_ing==1~fech_egres_num-fech_ing_num, T~dias_treat_imp_sin_na)) %>%
dplyr::select(-cambio_fecha_ing) %>%
#If status=1, the corresponding transition has been observed. Si no no se ha observado
# para efectos de este modelo simple, experimentar el alta terapéutica es el objetivo, por lo que el resto será censura
dplyr::mutate(readmission=ifelse(!is.na(fech_ing_next_treat),1,0)) %>%
#time of arrival at the state
dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo. Le añado un día
dplyr::mutate(cambio_fecha_ing_nuevo_t= dplyr::case_when(diff_bet_treat==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_next_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat+1,T~fech_ing_next_treat)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat-fech_egres_num, T~diff_bet_treat)) %>%
dplyr::select(-cambio_fecha_ing_nuevo_t) %>%
#dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#necesito dejarles un número de días a los casos censurados, por lo que tendré que calcularlo
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(ther_disch==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(readmission==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~diff_bet_treat)) %>%
## EL RESTO DE LOS PACIENTES NO VAN A HABER REGISTRADO EVENTOS EN ESE TIEMPO, POR LO QUE LLEGARÁN AL FINAL DEL SEGUIMIENTO
dplyr::select(row, fech_ing_num, dias_treat_imp_sin_na, ther_disch, diff_bet_treat, readmission, tipo_de_programa_2:numero_de_hijos_mod_rec) %>%
as.data.frame()
#### MSPREP= 4 STATES
#_#_#_#_#_#_#_
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
dplyr::mutate(ther_disch=ifelse(motivodeegreso_mod_imp=="Therapeutic discharge",1,0)) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo
dplyr::mutate(cambio_fecha_ing= dplyr::case_when(dias_treat_imp_sin_na==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_num= dplyr::case_when(cambio_fecha_ing==1~fech_ing_num-1,T~fech_ing_num)) %>%
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(cambio_fecha_ing==1~fech_egres_num-fech_ing_num, T~dias_treat_imp_sin_na)) %>%
dplyr::select(-cambio_fecha_ing) %>%
#If status=1, the corresponding transition has been observed. Si no no se ha observado
dplyr::mutate(disch_wo_clin_adv=ifelse(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0)) %>%
# para efectos de este modelo simple, experimentar el alta terapéutica es el objetivo, por lo que el resto será censura
dplyr::mutate(readmission=ifelse(!is.na(fech_ing_next_treat),1,0)) %>%
#time of arrival at the state
dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo. Le añado un día
dplyr::mutate(cambio_fecha_ing_nuevo_t= dplyr::case_when(diff_bet_treat==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_next_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat+1,T~fech_ing_next_treat)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat-fech_egres_num, T~diff_bet_treat)) %>%
dplyr::select(-cambio_fecha_ing_nuevo_t) %>%
#dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#necesito dejarles un número de días a los casos censurados, por lo que tendré que calcularlo
dplyr::mutate(days_to_ther_disch= dplyr::case_when(ther_disch==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
dplyr::mutate(days_to_disch_wo_clin_adv= dplyr::case_when(disch_wo_clin_adv==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(readmission==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~diff_bet_treat)) %>%
## EL RESTO DE LOS PACIENTES NO VAN A HABER REGISTRADO EVENTOS EN ESE TIEMPO, POR LO QUE LLEGARÁN AL FINAL DEL SEGUIMIENTO
dplyr::select(row, fech_ing_num, days_to_ther_disch, ther_disch, days_to_disch_wo_clin_adv, disch_wo_clin_adv, diff_bet_treat, readmission, tipo_de_programa_2:numero_de_hijos_mod_rec) %>%
as.data.frame()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_##_#_#_#_#_#_#_##_#_#_#_#_#_#_#
#_#_#_#_#_#_#_##_#_#_#_#_#_#_##_#_#_#_#_#_#_##_#_#_#_#_#_#_#
#_#_#_#_#_#_#_##_#_#_#_#_#_#_##_#_#_#_#_#_#_##_#_#_#_#_#_#_#
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
mat_3_states <- trans.illdeath(names=c("Admission","Therapeutic\nDischarge","Readmission"))
#All possible paths through the multi-state model can be found here:
#paths(mat_3_states)
#Los números son determinados por posición en cada columna (o eje Y).
#Si uno quiere definir para la otra fila, salta al siguiente vector:
mat_4_states<- transMat(list(c(2,3,4), 4, 4, c()),
names = c("Admission", "Therapeutic\nDischarge", "Discharge Without\nMedical Advice", "Readmission"))
#illness-death model without recovery
ms_CONS_C1_SEP_2020_women_imputed <- msprep(time = c(NA, "dias_treat_imp_sin_na", "diff_bet_treat"),
status = c(NA, "ther_disch", "readmission"),
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep,
id = "row",
trans = mat_3_states,
keep = names(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep)[-c(1:6)])
#model of 5 states without recovery.
ms2_CONS_C1_SEP_2020_women_imputed <- msprep(time = c(NA, "days_to_ther_disch", "days_to_disch_wo_clin_adv", "diff_bet_treat"),
status = c(NA, "ther_disch", "disch_wo_clin_adv", "readmission"),
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2,
id = "row",
trans = mat_4_states,
keep = names(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2)[-c(1:8)])
## Warning in msprepEngine(time = time, status = status, id = id, starttime =
## starttime, : Starting from state 1, simultaneous transitions possible for
## subjects 36666 36586 56465 136847 37595 60609 51706 76376 117544 140210 at times
## 126 472 32 36 1 203 45 14 5 71; smallest receiving state chosen
ms_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")] <- ms_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")]/365.25
ms2_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")] <- ms2_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")]/365.25
#Starting from state 1, simultaneous transitions possible for subjects 36666 36586 56465 136847 37595 60609 51706 76376 117544 140210 at times 126 472 32 36 1 203 45 14 5 71; smallest receiving state chosen
invisible(c("Investigate warning"))
data.frame(events(ms_CONS_C1_SEP_2020_women_imputed)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms_CONS_C1_SEP_2020_women_imputed)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(Proportions=scales::percent(Proportions)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 5. Empirical State Transition Matrix, 3 State Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 5. Empirical State Transition Matrix, 3 State Model
|
from
|
to
|
Frequencies
|
Proportions
|
|
Admission
|
Therapeutic Discharge
|
4,599
|
21.5%
|
|
Admission
|
Readmission
|
4,878
|
22.8%
|
|
Admission
|
no event
|
11,946
|
55.8%
|
|
Therapeutic Discharge
|
Admission
|
0
|
0.0%
|
|
Therapeutic Discharge
|
Readmission
|
538
|
11.7%
|
|
Therapeutic Discharge
|
no event
|
4,061
|
88.3%
|
|
Readmission
|
Admission
|
0
|
0.0%
|
|
Readmission
|
Therapeutic Discharge
|
0
|
0.0%
|
|
Readmission
|
no event
|
5,416
|
100.0%
|
|
a Note= Proportions from the initial state
|
data.frame(events(ms2_CONS_C1_SEP_2020_women_imputed)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms2_CONS_C1_SEP_2020_women_imputed)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(Proportions=scales::percent(Proportions)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 6. Empirical State Transition Matrix, 3 State Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state")%>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 6. Empirical State Transition Matrix, 3 State Model
|
from
|
to
|
Frequencies
|
Proportions
|
|
Admission
|
Therapeutic Discharge
|
4,599
|
21.47%
|
|
Admission
|
Discharge Without Medical Advice
|
10,840
|
50.60%
|
|
Admission
|
Readmission
|
2,459
|
11.48%
|
|
Admission
|
no event
|
3,525
|
16.45%
|
|
Therapeutic Discharge
|
Admission
|
0
|
0.00%
|
|
Therapeutic Discharge
|
Discharge Without Medical Advice
|
0
|
0.00%
|
|
Therapeutic Discharge
|
Readmission
|
538
|
11.70%
|
|
Therapeutic Discharge
|
no event
|
4,061
|
88.30%
|
|
Discharge Without Medical Advice
|
Admission
|
0
|
0.00%
|
|
Discharge Without Medical Advice
|
Therapeutic Discharge
|
0
|
0.00%
|
|
Discharge Without Medical Advice
|
Readmission
|
2,419
|
22.32%
|
|
Discharge Without Medical Advice
|
no event
|
8,421
|
77.68%
|
|
Readmission
|
Admission
|
0
|
0.00%
|
|
Readmission
|
Therapeutic Discharge
|
0
|
0.00%
|
|
Readmission
|
Discharge Without Medical Advice
|
0
|
0.00%
|
|
Readmission
|
no event
|
5,416
|
100.00%
|
|
a Note= Proportions from the initial state
|
Consideration of the Appropriateness of the proportional hazards assumption
#reescribo el tipo de programa para indicar 0 y 1s para tratamiento vs. control
if(dimnames(table(ms_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2))[[1]][1]=="General population"){
ms_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2<-ifelse(ms_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2=="Women specific",1,0)
}
if(dimnames(table(ms2_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2))[[1]][1]=="General population"){
ms2_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2<-ifelse(ms2_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2=="Women specific",1,0)
}
#podemos reemplazar tiempo por años.
ms_CONS_C1_SEP_2020_women_imputed$years <- ms_CONS_C1_SEP_2020_women_imputed$time/365.25
ms2_CONS_C1_SEP_2020_women_imputed$years <- ms2_CONS_C1_SEP_2020_women_imputed$time/365.25
# ’expand.covs()’ permits to define the set of covariates which impacts
# each transition:
#NO SE SI CONVIENE TRAER ESTA FUNCION
#data_mstate <- expand.covs(ms2_CONS_C1_SEP_2020_women_imputed, names(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep)[-c(1:6)], append = TRUE, longnames = FALSE)
plots<- data.frame(title=rep(c("(a) Admission -> Ther. Disch.","(b) Admission -> Readmission","(c) Ther. Disch. -> Readmission"),1),trans=rep(1:3,1))
## SIN COVARIABLES
par(mfrow=c(3,2))
for(i in c(1:3)){
plot(log(survfit(Surv(years,status)~ 1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==0))$time),
log(-log(survfit(Surv(years,status)~ 1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==0))$surv)), type="l",
xlab="log(time)", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=1.5, cex.axis=1.5)
lines(log(survfit(Surv(years,status)~ 1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==1))$time),
log(-log(survfit(Surv(years,status)~ 1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==1))$surv)), lty=2)
legend(-6,-4, c("GP", "WE"), bty="n", lty=c(2,1), cex=1.5)
title(main=paste0("LOG CUMULATIVE HAZARD VS LOG TIME PLOT \n",plots[i,"title"]), cex.main=1.5)
}
for(i in c(1:3)){
plot(survfit(Surv(years,status)~ 1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==0))$time,
-log(survfit(Surv(years,status)~ 1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==0))$surv), type="l",
xlab="time", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=1.5, cex.axis=1.5, col=1)
lines(survfit(Surv(years,status)~ 1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==1))$time,
-log(survfit(Surv(years,status)~ 1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==1))$surv), lty=2)
legend(.01,.1, c("GP", "WE"), bty="n", lty=c(2,1), cex=1.5)
title(main=paste0("CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) \n",plots[i,"title"]), cex.main=1.5)
}
Decision whether to use Markov or Semi-Markov
Assessment of Fit Transitions
Calculations State Occupancy Probabilities